1. Download Housing Dataset:¶
In [16]:
import requests
# Dataset URL
url = 'https://raw.githubusercontent.com/ageron/handson-ml2/refs/heads/master/datasets/housing/housing.csv'
# Local filename to save the CSV
filename = 'housing.csv'
# Fetch and save the CSV
response = requests.get(url)
if response.status_code == 200:
with open(filename, 'w', encoding='utf-8') as f:
f.write(response.text)
print(f"CSV file downloaded and saved as {filename}")
else:
print(f"Failed to download file. Status code: {response.status_code}")
CSV file downloaded and saved as housing.csv
2. Basic Pre-Processing¶
In [17]:
import pandas as pd
# Open the CSV file
housing_dataset = pd.read_csv('housing.csv')
# Remove NAN
housing_dataset.dropna()
# Map Ocean Proximity
mapping = {'NEAR BAY': 0, 'NEAR OCEAN': 1, 'INLAND': 2}
#housing_dataset['ocean_proximity_encoded'] = housing_dataset['ocean_proximity'].map(mapping)
In [18]:
import seaborn as sns
import matplotlib.pyplot as plt
# Plotting
sns.pairplot(housing_dataset, hue='ocean_proximity',
vars=['longitude', 'latitude', 'housing_median_age',
'total_rooms', 'total_bedrooms', 'population',
'households', 'median_income', 'median_house_value'],
plot_kws={'alpha': 0.5, 's': 20}) # alpha for transparency, s for point size
plt.suptitle('Attribute Relationships Colored by Ocean Proximity', y=1.02)
plt.show()